Geometries və Advanced Aesthetics
set.seed(2024)
n <- 200
academic_data <- data.frame(
student_id = 1:n,
age = sample(18:25, n, replace = TRUE),
gender = sample(c("Male", "Female"), n, replace = TRUE),
faculty = sample(c("Engineering", "Medicine", "Economics", "IT"), n, replace = TRUE),
math_score = round(rnorm(n, 75, 15), 1),
physics_score = round(rnorm(n, 72, 16), 1),
gpa = round(runif(n, 2.0, 4.0), 2),
study_hours_week = round(pmax(5, rnorm(n, 30, 10)), 1),
life_satisfaction = round(runif(n, 1, 10), 1),
stress_level = round(runif(n, 1, 10), 1),
stringsAsFactors = FALSE
)
academic_data$total_score <- academic_data$math_score + academic_data$physics_score
academic_data$performance_level <- cut(academic_data$gpa,
breaks = c(0, 2.5, 3.0, 3.5, 4.0),
labels = c("Below Average", "Average", "Good", "Excellent"))
cat("Dataset yaradıldı:", nrow(academic_data), "tələbə\n")#> Dataset yaradıldı: 200 tələbə
#> student_id age gender faculty math_score physics_score gpa
#> 1 1 19 Male Medicine 85.7 84.4 2.34
#> 2 2 22 Male Engineering 65.5 60.3 2.56
#> 3 3 22 Male IT 70.2 84.8 3.76
#> study_hours_week life_satisfaction stress_level total_score performance_level
#> 1 28.2 5.3 7.3 170.1 Below Average
#> 2 28.0 8.5 4.9 125.8 Average
#> 3 41.0 3.8 6.5 155.0 Excellent
# Basic scatter plot
plot(academic_data$math_score, academic_data$gpa,
main = "Math Score vs GPA",
xlab = "Math Score", ylab = "GPA",
pch = 16, col = "steelblue", cex = 0.8)
abline(lm(gpa ~ math_score, data = academic_data), col = "red", lwd = 2)#> ggplot2 kod:
#> ggplot(data, aes(x = math_score, y = gpa)) +
#> geom_point() + geom_smooth(method = 'lm')
# Multi-dimensional plot
faculty_colors <- rainbow(length(unique(academic_data$faculty)))
faculty_numeric <- as.numeric(as.factor(academic_data$faculty))
plot(academic_data$math_score, academic_data$gpa,
main = "Multi-dimensional Visualization",
xlab = "Math Score", ylab = "GPA",
col = faculty_colors[faculty_numeric],
pch = 16, cex = academic_data$study_hours_week / 25)
legend("bottomright",
legend = unique(academic_data$faculty),
col = faculty_colors, pch = 16, cex = 0.7)#>
#> ggplot2 multi-dimensional:
#> ggplot(data, aes(x = math_score, y = gpa)) +
#> geom_point(aes(color = faculty, size = study_hours_week))
# Faculty distribution
faculty_counts <- table(academic_data$faculty)
barplot(faculty_counts,
main = "Students by Faculty",
col = rainbow(length(faculty_counts)),
las = 2)#> ggplot2 bar chart:
#> ggplot(data, aes(x = faculty)) + geom_bar()
# Performance levels
performance_counts <- table(academic_data$performance_level)
pie(performance_counts,
main = "Performance Distribution",
col = c("red", "orange", "lightgreen", "darkgreen"))# Time series simulation
sample_students <- sample(academic_data$student_id, 5)
plot(1, type = "n", xlim = c(1, 6), ylim = c(2, 4),
main = "GPA Progression Simulation",
xlab = "Semester", ylab = "GPA")
colors <- rainbow(5)
for(i in 1:5) {
gpa_trend <- academic_data$gpa[i] + cumsum(rnorm(6, 0, 0.1))
lines(1:6, gpa_trend, col = colors[i], lwd = 2, type = "b")
}#>
#> ggplot2 line plot:
#> ggplot(data, aes(x = semester, y = gpa, color = student_id)) +
#> geom_line() + geom_point()
# Color gradients
plot(academic_data$total_score, academic_data$stress_level,
main = "Score vs Stress (Color: Life Satisfaction)",
xlab = "Total Score", ylab = "Stress Level",
col = heat.colors(10)[cut(academic_data$life_satisfaction, breaks = 10)],
pch = 16, cex = 1.2)
# Add legend
legend_colors <- heat.colors(5)
legend("topright", legend = c("Low", "", "Med", "", "High"),
col = legend_colors, pch = 16, title = "Satisfaction")#> ggplot2 color gradient:
#> ggplot(data, aes(x = total_score, y = stress_level)) +
#> geom_point(aes(color = life_satisfaction)) +
#> scale_color_gradient2(low = 'red', high = 'green')
# Shape mapping
gender_shapes <- c("Male" = 16, "Female" = 17)
plot(academic_data$math_score, academic_data$physics_score,
main = "Math vs Physics (Shape: Gender)",
xlab = "Math Score", ylab = "Physics Score",
pch = gender_shapes[academic_data$gender],
col = "darkblue", cex = 1.2)
legend("bottomright", legend = names(gender_shapes),
pch = gender_shapes, title = "Gender")#>
#> ggplot2 shape mapping:
#> ggplot(data, aes(x = math_score, y = physics_score)) +
#> geom_point(aes(shape = gender))
# Transparency for overplotting
par(mfrow = c(1, 2))
plot(academic_data$age, academic_data$gpa,
main = "Without Transparency",
xlab = "Age", ylab = "GPA",
pch = 16, col = "red")
plot(academic_data$age, academic_data$gpa,
main = "With Transparency",
xlab = "Age", ylab = "GPA",
pch = 16, col = rgb(1, 0, 0, alpha = 0.5))#> ggplot2 transparency:
#> ggplot(data, aes(x = age, y = gpa)) +
#> geom_point(alpha = 0.6)
# Linear regression with confidence interval
plot(academic_data$study_hours_week, academic_data$gpa,
main = "Study Hours vs GPA with Regression",
xlab = "Study Hours per Week", ylab = "GPA",
pch = 16, col = rgb(0.3, 0.6, 0.8, 0.7))
lm_model <- lm(gpa ~ study_hours_week, data = academic_data)
abline(lm_model, col = "red", lwd = 3)
# Add R-squared
r_sq <- summary(lm_model)$r.squared
text(40, 3.5, paste("R² =", round(r_sq, 3)),
col = "red", font = 2, cex = 1.2)#> ggplot2 regression:
#> ggplot(data, aes(x = study_hours_week, y = gpa)) +
#> geom_point() +
#> geom_smooth(method = 'lm', se = TRUE)
# Box plots with means
boxplot(gpa ~ faculty, data = academic_data,
main = "GPA by Faculty",
col = rainbow(4, alpha = 0.7))
# Add mean points
faculty_means <- aggregate(gpa ~ faculty, data = academic_data, mean)
for(i in 1:nrow(faculty_means)) {
points(i, faculty_means$gpa[i], pch = 18, cex = 2, col = "red")
}#>
#> ggplot2 box plots:
#> ggplot(data, aes(x = faculty, y = gpa)) +
#> geom_boxplot() +
#> stat_summary(fun = mean, geom = 'point', shape = 18, size = 4)
# Violin plot simulation
performance_levels <- unique(academic_data$performance_level)
performance_colors <- c("red", "orange", "lightgreen", "darkgreen")
plot(1, type = "n",
xlim = c(0.5, length(performance_levels) + 0.5),
ylim = range(academic_data$total_score, na.rm = TRUE),
main = "Score Distribution by Performance",
xlab = "Performance Level", ylab = "Total Score",
xaxt = "n")
for(i in 1:length(performance_levels)) {
perf_data <- academic_data[academic_data$performance_level == performance_levels[i], ]$total_score
if(length(perf_data) > 3) {
density_est <- density(perf_data)
density_scaled <- density_est$y / max(density_est$y) * 0.3
polygon(c(i - density_scaled, i + rev(density_scaled)),
c(density_est$x, rev(density_est$x)),
col = rgb(t(col2rgb(performance_colors[i]))/255, alpha = 0.4),
border = performance_colors[i])
# Add median line
median_val <- median(perf_data)
segments(i - 0.1, median_val, i + 0.1, median_val, lwd = 3)
}
}
axis(1, at = 1:length(performance_levels), labels = performance_levels, las = 2)#> ggplot2 violin plots:
#> ggplot(data, aes(x = performance_level, y = total_score)) +
#> geom_violin() +
#> geom_boxplot(width = 0.1)
# Correlation heatmap
numeric_vars <- c("math_score", "physics_score", "gpa", "study_hours_week", "life_satisfaction")
cor_matrix <- cor(academic_data[, numeric_vars], use = "complete.obs")
heatmap(cor_matrix,
main = "Correlation Matrix",
col = colorRampPalette(c("red", "white", "blue"))(50),
symm = TRUE)#>
#> ggplot2 heatmap:
#> library(reshape2)
#> cor_melted <- melt(cor_matrix)
#> ggplot(cor_melted, aes(x = Var1, y = Var2, fill = value)) +
#> geom_tile() +
#> scale_fill_gradient2(low = 'red', high = 'blue')
# Advanced annotations
plot(academic_data$total_score, academic_data$life_satisfaction,
main = "Academic Performance vs Life Satisfaction",
xlab = "Total Score", ylab = "Life Satisfaction",
pch = 16, col = rgb(0.3, 0.6, 0.8, 0.6), cex = 1.1)
# Add regression line
lm_sat <- lm(life_satisfaction ~ total_score, data = academic_data)
abline(lm_sat, col = "red", lwd = 2)
# Find outliers
top_performer <- academic_data[which.max(academic_data$total_score), ]
points(top_performer$total_score, top_performer$life_satisfaction,
col = "green", pch = 17, cex = 2)
text(top_performer$total_score, top_performer$life_satisfaction + 0.5,
"Top Performer", col = "green", font = 2)
# Add correlation
correlation <- cor(academic_data$total_score, academic_data$life_satisfaction)
text(100, 9, paste("r =", round(correlation, 3)),
col = "red", font = 2, cex = 1.2)#> ggplot2 annotations:
#> ggplot(data, aes(x = total_score, y = life_satisfaction)) +
#> geom_point() +
#> geom_smooth(method = 'lm') +
#> annotate('text', x = 100, y = 9, label = 'r = 0.45') +
#> annotate('point', x = max_score, y = max_sat, color = 'green')
# Directional arrows
plot(academic_data$study_hours_week, academic_data$stress_level,
main = "Study Hours vs Stress (with Annotations)",
xlab = "Study Hours per Week", ylab = "Stress Level",
pch = 16, col = rgb(0.4, 0.6, 0.8, 0.7))
# Find efficient students (high hours, low stress)
efficient <- academic_data[academic_data$study_hours_week > 40 &
academic_data$stress_level < 3, ]
if(nrow(efficient) > 0) {
student <- efficient[1, ]
arrows(student$study_hours_week - 5, student$stress_level + 1,
student$study_hours_week, student$stress_level,
col = "green", lwd = 2, length = 0.1)
text(student$study_hours_week - 5, student$stress_level + 1.5,
"Efficient", col = "green", font = 2)
}#>
#> ggplot2 arrows:
#> ggplot(data, aes(x = study_hours_week, y = stress_level)) +
#> geom_point() +
#> annotate('segment', x = 35, y = 4, xend = 42, yend = 2.5,
#> arrow = arrow(), color = 'green')
Bu hissədə öyrəndiklərimiz:
Növbəti hissədə: Themes, customization və publication-ready graphics
Part 2-də themes və professional visualization öyrənəcəyik.